#使用bs4进行数据解析
import requests
from bs4 import BeautifulSoup

if __name__ == '__main__':
    url ='https://www.shicimingju.com/book/sanguoyanyi.html'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.76'
    }
    page_text = requests.get(url=url,headers=headers).content

    soup = BeautifulSoup(page_text,'lxml')
    #解析详情页的url
    li_list = soup.select('.book-mulu > ul > li')

    fp = open('./sanguo.txt','w',encoding='utf-8')
    i = int(0)
    for li in li_list:
        title = li.a.string

        datail_url = 'https://www.shicimingju.com'+li.a['href']
        print(datail_url)
        #对详情页发起请求，爬取详情页内容
        datail_page_text = requests.get(url=datail_url,headers=headers).content
        datail_page_text = str(datail_page_text,'utf-8')
        # datail_page_text.encoding = 'GBK'
        # datail_page_text.encoding = 'utf-8'


        # datail_page_text.encoding = "utf-8"

        datail_soup = BeautifulSoup(str(datail_page_text),'lxml')

        div_tag = datail_soup.find('div',class_= 'chapter_content')

        content = div_tag.text


        fp.write(title+':'+str(content)+'\n')

        i+= 1
        print(title)
        print('成功'+str(i))
